#############################################################
##############           Placebo Tests  ####################
###############     October 10, 2018     ##################
#######  Rerun December 17, 2022 

rm(list=ls())
library(foreign)
library(plyr)
library(readstata13)
library(multiwayvcov)
library(sandwich)
library(lmtest)
library(stargazer)
library(ggplot2)


#################################################
########### Multiplot Function #################
######   Source: http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/



multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
########################################################


data=read.csv("~/Dropbox/Personal Research 2017/replications/karn_nov16.csv")
names(data)
summary(data$NEAR_DIST_border1)
summary(data$border1)

###################################################
###Create Placebo Border to the North (-10 km to Princely State)

#calculate new distances

#border1
#mysore part
summary(data$NEAR_DIST_border1[data$border1==1])
data$distplacebo1[(data$border1==1)&!is.na(data$border1)]=
  data$NEAR_DIST_border1[(data$border1==1)&!is.na(data$border1)]+10000

summary(data$distplacebo1[data$border1==1])

#bombay part
data$distplacebo1[(data$border1==0)&!is.na(data$border1)
                  &(data$NEAR_DIST_border1<10000)]=
  10000-data$NEAR_DIST_border1[(data$border1==0)&!is.na(data$border1)
                               &(data$NEAR_DIST_border1<10000)]

data$distplacebo1[(data$border1==0)&!is.na(data$border1)
                  &(data$NEAR_DIST_border1>=10000)]=
  data$NEAR_DIST_border1[(data$border1==0)&!is.na(data$border1)
                         &(data$NEAR_DIST_border1>=10000)]-10000

summary(data$NEAR_DIST_border1[data$border1==0])
summary(data$distplacebo1[data$border1==0])


#create a new treatment variable
#old mysore and chunk of the bombay are in the treatment, 
#and the rest of bombay are in teh control group
data$borderplac1[data$border1==1]=1
data$borderplac1[(data$border1==0)&!is.na(data$border1)
                 &(data$NEAR_DIST_border1<10000)]=1
data$borderplac1[(data$border1==0)&!is.na(data$border1)
                 &(data$NEAR_DIST_border1>=10000)]=0
summary(data$borderplac1)



##### Distances #####

#Distance to Mysore-Bombay Border
rd10.mb=data[which(data$distplacebo1<10000),] #20 km

table(rd10.mb$borderplac1)


#######Linear Polynomial #####

#baseline bandwidth
#Mysore-Bombay
mys.health1=lm(health_binary~borderplac1+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.mb)
summary(mys.health1)
mys.health1.cl=cluster.vcov(mys.health1, rd10.mb$dist_name)
mys.health1.se=sqrt(diag(mys.health1.cl)) #cluster standard errors

mys.pucca1=lm(pucca_binary~borderplac1+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.mb)
summary(mys.pucca1)
mys.pucca1.cl=cluster.vcov(mys.pucca1, rd10.mb$dist_name)
mys.pucca1.se=sqrt(diag(mys.pucca1.cl)) #cluster standard errors


#############################
#### Create Placebo Border to the South (+10 km to the princely state)

#border1
#mysore part
summary(data$NEAR_DIST_border1[data$border1==0])
data$distplacebo1[(data$border1==0)&!is.na(data$border1)]=
  data$NEAR_DIST_border1[(data$border1==0)&!is.na(data$border1)]+10000

summary(data$distplacebo1[data$border1==0])

#bombay part
data$distplacebo1[(data$border1==1)&!is.na(data$border1)
                  &(data$NEAR_DIST_border1<10000)]=
  10000-data$NEAR_DIST_border1[(data$border1==1)&!is.na(data$border1)
                               &(data$NEAR_DIST_border1<10000)]

data$distplacebo1[(data$border1==1)&!is.na(data$border1)
                  &(data$NEAR_DIST_border1>=10000)]=
  data$NEAR_DIST_border1[(data$border1==1)&!is.na(data$border1)
                         &(data$NEAR_DIST_border1>=10000)]-10000

summary(data$NEAR_DIST_border1[data$border1==1])
summary(data$distplacebo1[data$border1==1])


#create a new treatment variable
#old mysore and chunk of the bombay are in the treatment, 
#and the rest of bombay are in teh control group

data$borderplac1[data$border1==0]=1
data$borderplac1[(data$border1==1)&!is.na(data$border1)
                 &(data$NEAR_DIST_border1<10000)]=1
data$borderplac1[(data$border1==1)&!is.na(data$border1)
                 &(data$NEAR_DIST_border1>=10000)]=0
summary(data$borderplac1)


##### Distances #####

#Distance to Mysore-Bombay Border
rd10.mb=data[which(data$distplacebo1<10000),] #20 km

table(rd10.mb$borderplac1)


##### Simple OLS with controls ######
## and linear polynomial of lat and long  ###


#baseline bandwidth
#Mysore-Bombay
mys.health2=lm(health_binary~borderplac1+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.mb)
summary(mys.health2)
mys.health2.cl=cluster.vcov(mys.health2, rd10.mb$dist_name)
mys.health2.se=sqrt(diag(mys.health2.cl)) #cluster standard errors

mys.pucca2=lm(pucca_binary~borderplac1+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.mb)
summary(mys.pucca2)
mys.pucca2.cl=cluster.vcov(mys.pucca2, rd10.mb$dist_name)
mys.pucca2.se=sqrt(diag(mys.pucca2.cl)) #cluster standard errors



######################################
#border2

#####Calculate new distances (-10 km from the princely state)

#hyd part
summary(data$NEAR_DIST_border2[data$border2==1])
data$distplacebo2[(data$border2==1)&!is.na(data$border2)]=
  data$NEAR_DIST_border2[(data$border2==1)&!is.na(data$border2)]+10000

summary(data$distplacebo2[data$border2==1])

#bombay part
data$distplacebo2[(data$border2==0)&!is.na(data$border2)
                  &(data$NEAR_DIST_border2<10000)]=
  10000-data$NEAR_DIST_border2[(data$border2==0)&!is.na(data$border2)
                               &(data$NEAR_DIST_border2<10000)]

data$distplacebo2[(data$border2==0)&!is.na(data$border2)
                  &(data$NEAR_DIST_border2>=10000)]=
  data$NEAR_DIST_border2[(data$border2==0)&!is.na(data$border2)
                         &(data$NEAR_DIST_border2>=10000)]-10000

summary(data$NEAR_DIST_border2[data$border2==0])
summary(data$distplacebo2[data$border2==0])

#create a new treatment variable
#old mysore and chunk of the bombay are in the treatment, 
#and the rest of bombay are in teh control group
data$borderplac2[data$border2==1]=1
data$borderplac2[(data$border2==0)&!is.na(data$border2)
                 &(data$NEAR_DIST_border2<10000)]=1
data$borderplac2[(data$border2==0)&!is.na(data$border2)
                 &(data$NEAR_DIST_border2>=10000)]=0
summary(data$borderplac2)


#Distance to Hyderabad-Bombay Border
rd10.hb=data[which(data$distplacebo2<10000),] #20 km
table(rd10.hb$borderplac2)

#hyderabad

hyd.health1=lm(health_binary~borderplac2+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.hb)
summary(hyd.health1)
hyd.health1.cl=cluster.vcov(hyd.health1, rd10.hb$dist_name)
hyd.health1.se=sqrt(diag(hyd.health1.cl)) #cluster standard errors

hyd.pucca1=lm(pucca_binary~borderplac2+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.hb)
summary(hyd.pucca1)
hyd.pucca1.cl=cluster.vcov(hyd.pucca1, rd10.hb$dist_name)
hyd.pucca1.se=sqrt(diag(hyd.pucca1.cl)) #cluster standard errors


####Calculate new distances (+10 km to the princely state)

#hyd part
summary(data$NEAR_DIST_border2[data$border2==0])
data$distplacebo2[(data$border2==0)&!is.na(data$border2)]=
  data$NEAR_DIST_border2[(data$border2==0)&!is.na(data$border2)]+10000

summary(data$distplacebo2[data$border2==0])

#bombay part
data$distplacebo2[(data$border2==1)&!is.na(data$border2)
                  &(data$NEAR_DIST_border2<10000)]=
  10000-data$NEAR_DIST_border2[(data$border2==1)&!is.na(data$border2)
                               &(data$NEAR_DIST_border2<10000)]

data$distplacebo2[(data$border2==1)&!is.na(data$border2)
                  &(data$NEAR_DIST_border2>=10000)]=
  data$NEAR_DIST_border2[(data$border2==1)&!is.na(data$border2)
                         &(data$NEAR_DIST_border2>=10000)]-10000

summary(data$NEAR_DIST_border2[data$border2==1])
summary(data$distplacebo2[data$border2==1])

#create a new treatment variable
#old mysore and chunk of the bombay are in the treatment, 
#and the rest of bombay are in teh control group
data$borderplac2[data$border2==0]=1
data$borderplac2[(data$border2==1)&!is.na(data$border2)
                 &(data$NEAR_DIST_border2<10000)]=1
data$borderplac2[(data$border2==1)&!is.na(data$border2)
                 &(data$NEAR_DIST_border2>=10000)]=0
summary(data$borderplac2)

#Distance to Hyderabad-Bombay Border
rd10.hb=data[which(data$distplacebo2<10000),] #20 km
table(rd10.hb$borderplac2)

#hyderabad
hyd.health2=lm(health_binary~borderplac2+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.hb)
summary(hyd.health2)
hyd.health2.cl=cluster.vcov(hyd.health2, rd10.hb$dist_name)
hyd.health2.se=sqrt(diag(hyd.health2.cl)) #cluster standard errors

hyd.pucca2=lm(pucca_binary~borderplac2+TOT_POP+
        TOT_SC+TOT_ST+Slope+TerrainRug+Latitude+Longitude, data=rd10.hb)
summary(hyd.pucca2)
hyd.pucca2.cl=cluster.vcov(hyd.pucca2, rd10.hb$dist_name)
hyd.pucca2.se=sqrt(diag(hyd.pucca2.cl)) #cluster standard errors




stargazer(mys.health1, mys.pucca1, mys.health2, mys.pucca2, 
          se=list(mys.health1.se, mys.pucca1.se, mys.health2.se, mys.pucca2.se), digits=3, 
          omit=c("TOT_POP", "TOT_SC", "TOT_ST", "Slope", "TerrainRug", "Latitude", "Longitude"), 
          dep.var.labels=c("Health Centers", "Paved Roads", "Health Centers", "Paved Roads"), 
          column.labels =c("-10 km", "+10 km"), column.separate = c(2,2),
          covariate.labels = c("Placebo Indirect Rule", "Constant"),
          add.lines = list(c("Controls","\\checkmark","\\checkmark","\\checkmark","\\checkmark")), 
          omit.stat = c("rsq", "f", "adj.rsq", "ser"))

stargazer(hyd.health1, hyd.pucca1, hyd.health2, hyd.pucca2, 
          se=list(hyd.health1.se, hyd.pucca1.se, hyd.health2.se, hyd.pucca2.se), digits=3, 
          omit=c("TOT_POP", "TOT_SC", "TOT_ST", "Slope", "TerrainRug", "Latitude", "Longitude"), 
          dep.var.labels=c("Health Centers", "Paved Roads", "Health Centers", "Paved Roads"), 
          column.labels =c("-10 km", "+10 km"), column.separate = c(2,2),
          covariate.labels = c("Placebo Indirect Rule", "Constant"),
          add.lines = list(c("Controls","\\checkmark","\\checkmark","\\checkmark","\\checkmark")), 
          omit.stat = c("rsq", "f", "adj.rsq", "ser"))











